@inproceedings{zhou2018end,
  title={End-to-End Dense Video Captioning with Masked Transformer},
  author={Zhou, Luowei and Zhou, Yingbo and Corso, Jason J and Socher, Richard and Xiong, Caiming},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={8739--8748},
  year={2018}
}